******** UPDATES *************************************************** 
3/31/08:  COMMENTED OUT VEHICLE PART UNTIL FINISH THIS, ADDED FOOD
          AWAY AND FOOD AT HOME 7/23/08:  CONSTRUCTED DATA FROM DETAILED EXP FILE IN WINDOWS SAS
          AND MERGE THAT WITH THE FAMILY FILES, TO GET UPDATED
          FAMILY SIZE, # OF KIDS, AND EXPENDITURES BY CATEGORY 

11/1/10:  REMOVED TOPCODE FOR RENTAL EQUIV, WHICH WAS BASED ON REAL VALUE OF 1988 TOPCODE
          BEFORE THIS I WAS RECODING ALL MISSING RENTEQ TO 375 OR 399 BECAUSE MIN(RENTEQ,375)...

6/5/19: ADDED EARNINGS AND OCC FOR HEAD, SPOUSE AND OTHERS

*****************************************************************;




******** NOTE *************************************************** 
I add 99000000 to newid just in case this variable started over at some point 
*****************************************************************;

*%include "~/jim/gzipseq.mcr"; 
%include "~/jim/poverty/include/infile_cons.mcr";

LIBNAME flows '~/jim/data/ce7273';
LIBNAME pmeters '~/jim/data';
LIBNAME alldat '~/jim/data';


******* FAMILY FILES **********;

filename fmly7273 pipe 'gunzip -c ~/jim/data/ce7273/09034-0011-Data.txt.gz';

***************************************************************;

DATA fam7273;  
INFILE fmly7273 lrecl=3400; 
INPUT &all7273;

year=1970+yr_id;

newid=1000000*year+newid; 
*newid=99000000+newid;

***************************************************************** 
CLEAN DATA 
*****************************************************************;

ARRAY decimal
	totexp totfood fdhome fdaway totalc tottobac totshelt totrent totowndwe totothlod
	totutil tottrans totvehic oth_veh tothealth tothexp toteducpriv toteducpub fincbtax
	earnings_oth fedtax statetax fsalaryx fsalaryx2 fsalaryx3
	foodstamp miscelexp lifeins othins retpen cashco
        phone domestic hfurn dry_clean clothing pers_care recreat reading trips_gas trips_trans
	nonincmx veh_fin veh_gas veh_maint veh_pub mealaspay invest_inc ss_inc ret_inc priv_pen 
	oth_inc WELFAREX
; 
DO OVER decimal;
   decimal=decimal/100; 
END;

*COUNT INCOME SOURCES FOR TOPCODE ADJUSTMENT;
ARRAY inc_s(1:10) fsalaryx fsalaryx2 fsalaryx3 mealaspay invest_inc ss_inc ret_inc 
		 priv_pen oth_inc WELFAREX; 
ARRAY inc(1:10);
DO t=1 TO 10; 
   IF inc_s(t) GE 9999999 THEN inc(t)=1;
   ELSE inc(t)=0;
END; 
* TOTAL # OF INCOME SOURCES;
num_inc_sources=SUM(OF inc1-inc10);

*RECODE INCOME SOURCES WHEN FINCBTAX IS TOP OR BOTTOM CODED;
ARRAY recode fsalaryx fsalaryx2 fsalaryx3 mealaspay invest_inc ss_inc ret_inc
             priv_pen oth_inc WELFAREX;
DO OVER recode;
   IF recode GE 9999999 THEN recode=fincbtax/num_inc_sources; 
END;

*ASSIGN AVG TAX RATE WHEN TOPCODED (i.e. MEAN FED OR STATE TAX / MEAN FINCBTAX);
   IF fedtax GE 9999999 THEN fedtax=fincbtax*(-0.108); 
   IF statetax GE 9999999 THEN statetax=fincbtax*(-0.018); 

DROP inc1-inc10;
 
tax_inc=SUM(OF fsalaryx fsalaryx2);
 
* FOR FSALARY3 (SELF EMP) WORKER PAYS BOTH PARTS OF FICA; 
IF year=1972 THEN fica=MIN(MAX(0,tax_inc*0.046)+MAX(0,fsalaryx3*0.092),9000);
   ELSE IF year=1973 THEN fica=MIN(MAX(0,tax_inc*0.0485)+MAX(0,fsalaryx3*0.097),10800);
 
*NOTE, FINCATAX IN LATER YEARS DOES NOT INCLDUE FICA;
*TAXES ARE NEGATIVE VALUES; 
fincatax=fincbtax+fedtax+statetax;
 
* DEFINE INCWEEK1 AND INCWEEK2 AS IN LATER YEARS; 
* This will create missing for those that report weeks=88 and employment status as not reproted; 
IF famrel2=2 AND 0 LE famweek2 LE 52 THEN incweek2=famweek2;
   ELSE IF famrel2=2 AND famweek2=88 AND emp_stat2=0 THEN incweek2=0;
   ELSE IF famrel2 NE 2 THEN incweek2=0;
 
* There are no obs with incweek1=0, recode those with incweek1=88 to =0 if emp_stat is reported; 
IF incweek1=88 AND emp_stat=0 THEN incweek1=0; 
IF incweek1=88 AND emp_stat=3 THEN incweek1=.;
 
assets72=FLOOR(assets72/100); 
IF assets72 GT 99000 THEN assets72=99000; 
chg_ass_lib=FLOOR(chg_ass_lib/100); 
chg_ass=FLOOR(chg_ass/100); 
chg_lib=FLOOR(chg_lib/100);

* in 72-73 data, 8888 is missing and 9999 is 1001 and above; 
IF renteq=8888 THEN renteq=.;
   ELSE IF renteq NE . THEN renteq=MIN(renteq,1000); 
* Impose top-coding from later years; 
*IF year=1972 THEN renteq=MIN(renteq,375); 
*IF year=1973 THEN renteq=MIN(renteq,399);

* NOTE 1 OBS HAS MARITAL1=0, ITS A 24 YR OLD WITH FAM_SIZE=1 SO
  DENOTE THIS PERSON, MARRIED=0; 
if marital1=1 THEN married=1;
   ELSE married=0; 
if rooms=0 then rooms=.;

IF ac IN(1,2,3) THEN cen_ac=1;
   ELSE IF ac IN(4,5) THEN cen_ac=0; 
IF ac=4 THEN win_ac=1;
   ELSE IF ac IN(1,2,3,5) THEN win_ac=0;

IF ind_spouse="A" THEN ind_spouse=".";

RUN;

************************************************************** 
MERGE IN DATA FROM DETAILED EXPENDITURE FILE (SEE DETAIL_EXP7273.SAS IN D:\Jim\CEXdata 
**************************************************************;

DATA fam7273;
   MERGE fam7273(IN=in1 RENAME=(ind_spouse=temp1)) flows.fam_det_final7273;
   BY newid;
   IF in1;

ind_spouse=temp1*1;
DROP temp1;

************************************************************** 
CONSTRUCT A MEASURE OF EXPEND THAT IS DEFINED CONSISTENTLY ACROSS Y
EARS USING DETAILED EXP DATA, THESE CATEGORIES ARE FOLLOW THE CROSS-WALK 
IN CB8081.DOC 
**************************************************************; 
* NOTE, TO BE CONSISTENT WITH LATER YEARS, THIS INCLUDES ALL CATEGORIES EXCEPT
  MISCELLANEOUS EXP AND CASH CONTRIBUTIONS; 
totexp2=SUM(OF d_fdhome d_fdaway d_alcohol d_rent d_owndwell d_othlodge
				d_utility d_houseop d_hfurn d_clothing d_vehic d_gas d_othvehic
				d_pubtrans d_health d_entertain d_perscare d_read d_educ d_tobacco
				d_lifeins d_retire);

perslt18=d_perslt18;

fam_size=d_fam_memb;

persot64=d_persot64;

************************************************************** 
ABOUT 1,000 HOMEOWNERS HAVE MISSING RENTAL EQUIV. SO WE IMPUTE 
FOLLOWING PROCEDURE USED FOR 1980-81 
**************************************************************; 
totexp_nh=totexp2-d_owndwell;
 
IF renteq GT 0 THEN ln_renteq=LOG(renteq);
   ELSE ln_renteq=0; 
IF houseval GT 0 THEN ln_houseval=LOG(houseval);
   ELSE ln_houseval=0; 
IF totexp_nh GT 0 THEN ln_totexp_nh=LOG(totexp_nh);
   ELSE ln_totexp_nh=0;
     
IF fam_size=1 AND married=0 AND sex=2 THEN swnk=1;
   ELSE swnk=0; 
IF fam_size GT 1 AND perslt18 GT 0 AND married=0 AND sex=2 THEN sm=1;
   ELSE sm=0; 
IF fam_size GT 1 AND perslt18 GT 0 AND married=1 THEN mm=1;
   ELSE mm=0; 
RUN;


PROC REG DATA=fam7273 OUTEST=pmeters.house_param7273;
    WHERE cutenure IN(1,2,3) AND ln_houseval GT 0 AND ln_renteq GT 0;
    MODEL ln_renteq = ln_houseval ln_totexp_nh fam_size sm mm swnk;  
RUN;
 
************************************************************** 
This generates the predicted values or renteq using the 
parameters generated from regression above 
**************************************************************; 
PROC SCORE DATA=fam7273 SCORE=pmeters.house_param7273 OUT=predhome TYPE=PARMS;
   ID newid;
   VAR ln_houseval ln_totexp_nh fam_size sm mm swnk; 
RUN;

PROC MEANS DATA=predhome; 
RUN;
 
PROC SORT DATA=predhome;
  BY newid; 
RUN; 
PROC SORT DATA=fam7273;
  BY newid; 
RUN;

DATA fam7273;
   MERGE fam7273(IN=in1) predhome(KEEP=newid model1 RENAME=(model1=ln_renteq_pred));
   BY newid;
   IF in1;

IF renteq=. AND cutenure IN (1,2,3) THEN renteq2=EXP(ln_renteq_pred);
   ELSE renteq2=renteq;

IF cutenure=4 THEN hflow=0;
   ELSE IF cutenure IN (1,2,3) THEN hflow=renteq2*12;
   ELSE IF cutenure IN (5,6) AND renteq GT 0 THEN hflow=renteq2*12;
   ELSE IF cutenure IN (5,6) AND renteq=0 THEN hflow=d_owndwell;

RUN; 
PROC MEANS; 
RUN;

************************************************************************** 
BRING IN DATA ON VEHICLE FLOWS 
**************************************************************************;

PROC SORT DATA=flows.num_cars_trucks7273;
   BY newid; 
RUN;


**************************************************************************;

DATA famstep7273;
   MERGE fam7273(IN=in1) flows.num_cars_trucks7273;
   BY newid;
   IF in1;

* USE VEHQ INSTEAD OF NUM_CARS B/C IT APPEARS MANY VEHICLES THAT
  ARE CARS IN THE DETAILED FILE ARE CATEGORIZED AS OTHER.  IN CASES WHERE MORE
  AUTOS ARE REPORTED IN THE DETAILED VEHICLE DATA WE TAKE THAT NUMBER, B/C VEHQ
  IS TOPCODED AT 2; 
IF num_cars_trucks=. THEN num_cars_trucks=0; 
IF num_cars=. THEN num_cars=0; 
tot_cars=MAX(num_cars,vehq);
    
totexp3=totexp2-d_autos; 
totexp3_2=totexp3*totexp3; 
IF totexp3 GT 0 THEN ln_totexp3=log(totexp3);
   ELSE ln_totexp3=0; 
IF fincbtax GT 0 THEN ln_fincbtax=log(fincbtax);
   ELSE ln_fincbtax=0;
    
sex_dum=sex-1;
    
IF educ_ref IN(1,2,6) THEN ed_type=1;
           ELSE IF educ_ref=3 THEN ed_type=2;
           ELSE IF educ_ref=4 THEN ed_type=3;
           ELSE IF educ_ref=5 THEN ed_type=4;
 
ARRAY ed(1:4);
  DO t=1 TO 4;
     IF ed_type=t THEN ed(t)=1;
       ELSE ed(t)=0; END;

*CALCULATE SPENDING PER CAR PURCHASED IN SURVEY YEAR; 
*7273 VEH PURCHASE DATA ARE NET OF TRADE-IN COST, BUT IN 80-05 WE PREDICT
 USING THE GROSS PURCH PRISE, SO ADJUST PURCHASE VALUE BY 1.134 WHICH IS 1 PLUS THE
 RATIO OF MEAN TRADE IN VALUE TO MEAN NET VALUE;  * RAN; 
   /*
   proc means DATA=carslib.master3;
   where vehicyb=100 AND 19801 LE qyear LE 19804;
   var netpurx totpurx;
   run; 
   */

IF num_cars_purch GT 0 THEN per_car=(d_autos/num_cars_purch)*1.134; 
IF num_cars_purch GT 0 THEN ln_per_car=log(d_autos/num_cars_purch*1.134);
    
RUN;
    
proc means; 
var d_vehic d_autos ln_per_car per_car vehq tot_cars ln_totexp3 totexp3
    totexp3_2 age_ref ln_fincbtax fam_size sex_dum ed1 ed2 ed3; 
run;
 
PROC REG DATA=famstep7273 OUTEST=pmeters.car_param7273;
    WHERE num_cars_purch GT 0;
    MODEL ln_per_car = ln_totexp3 age_ref ln_fincbtax fam_size sex_dum ed1 ed2 ed3;
    OUTPUT OUT=reg_out2 P=yhat R=resid; 
RUN;

* ADJUST FOR FACT THAT GENERATING PREDICTED VALUES FOR LOGS (SEE BABY WOOLDRIDGE P. 208); 
DATA alpha_est; 
SET reg_out2(KEEP=yhat ln_per_car);
  m_hat=EXP(yhat);
  y=EXP(ln_per_car); 
KEEP m_hat y; 
RUN; 
PROC REG DATA=alpha_est OUTEST=alpha2;
   MODEL y = m_hat /noint; 
RUN;
     
PROC SCORE DATA=famstep7273 SCORE=pmeters.car_param7273 OUT=predveh TYPE=PARMS;
   ID newid;
   VAR ln_totexp3 age_ref ln_fincbtax fam_size sex_dum ed1 ed2 ed3; 
RUN;
 
DATA predveh; 
IF _N_=1 THEN SET alpha2(KEEP=m_hat RENAME=(m_hat=alpha_adj2)); 
SET predveh;  
model2=EXP(model1)*alpha_adj2; 
KEEP newid model2 model1; 
RUN; 
PROC MEANS;  
RUN;
 
PROC SORT DATA=predveh;
   BY newid; 
RUN;
 
PROC MEANS DATA=predveh; 
RUN;

DATA famstep7273;
   MERGE famstep7273 predveh;
   BY newid;

* generate purchase identifier to calcualte depreciation and avg life; IF d_autos GT 0 THEN car_purch=1;
   ELSE car_purch=0;

RUN;

PROC SORT DATA=famstep7273;
    BY year; 
RUN;

PROC MEANS DATA=famstep7273;
    BY year;
    VAR car_purch;
    WEIGHT finlwt;
    OUTPUT OUT=freq_purch MEAN=frac_car_purch; 
RUN;

DATA alldat.famstep7273;
   MERGE famstep7273(IN=in1) freq_purch(KEEP=year frac_car_purch);
   BY year;

 
* IF REPORT PURCHASE, THEN JUST TAKE THE SCALED UP PURCHASE PRICE TIMES DEPRECIATION RATE,
   OTHERWISE WE TAKE THE PREDICTED PURCHASE VALUE TIMES AVG AGE, WHICH IS THE MEAN TIME A FAMILY
   HAS OWNED A AUTO IN THE 1980 SURVEY (3.51 YEARS)--IN WINDOWS, RAN;
   /*
   DATA temp;
   set carslib.master3;
   IF vehicyb=100 AND 19801 LE qyear LE 19804;
   IF own_for GE 180 THEN own_for=180;
   run;
   proc means DATA=temp;
   where vehicyb=100 AND 19801 LE qyear LE 19804;
   var own_for qyear;
   run;
   */

* TAKE THE DEPRECIATION RATE FOR 80-05 AND SCALE IT UP BY 1.2613, WHICH IS THE RATIO OF THE
  FRACTION OF SAMPLE THAT HAVE POSITIVE SPENDING ON CAR PURCHASES IN THE PAST YEAR FOR 72-23
  (34%) TO 80 (27%); 
delta72=0.1459*1.2613; 
avg_age=3.51;

* SCALE UP TOT_CARS BY THE MEAN # OF CARS OWNED IN 80-81 CONDITIONAL ON HAVING AT LEAST
  2 CARS: 2.39; 
IF tot_cars=2 THEN tot_cars=2.39;

IF num_cars_purch GT 0 THEN
   vflow_pred=per_car*MIN(num_cars_purch,tot_cars)*delta72
              +model2*MAX(tot_cars-num_cars_purch,0)*(delta72)*(1-delta72)**(avg_age);
   ELSE vflow_pred=model2*tot_cars*(delta72)*(1-delta72)**(avg_age);
 
   vflow_katz=tot_cars*model2/8;

tflow=(totexp2+vflow_pred+hflow
      -d_owndwell
      -d_autos
      -d_autofin
      -d_health
      -d_educ
      -d_retire);

srv_year=year-1900; 
Ref_year=year;

IF ref_year=1972 THEN DO;
if fam_size=1 and age_ref < 65 then Pov_cut= 5180*0.4197 ;
else if fam_size=1 and age_ref ge 65 then Pov_cut=4775*0.4197 ;
else if fam_size = 2 and age_ref < 65 then Pov_cut=6863*0.4197   ;
else if fam_size=2 and age_ref ge 65 then Pov_cut= 6837*0.4197 ;
else if fam_size=3 then Pov_cut=8022*0.4197   ;
else if fam_size=4 then Pov_cut=10133*0.4197   ;
else if fam_size=5 then Pov_cut=11701*0.4197   ;
else if fam_size=6 then Pov_cut=13056*0.4197  ;
else if fam_size=7 then Pov_cut=14315*0.4197  ;
else if fam_size=8 then Pov_cut=16244*0.4197  ;
else if fam_size ge 9 then Pov_cut=19249*0.4197;
END;

IF ref_year=1973 THEN DO;
if fam_size=1 and age_ref < 65 then Pov_cut= 5180*0.4458 ;
else if fam_size=1 and age_ref ge 65 then Pov_cut=4775*0.4458 ;
else if fam_size = 2 and age_ref < 65 then Pov_cut=6863*0.4458   ;
else if fam_size=2 and age_ref ge 65 then Pov_cut= 6837*0.4458 ;
else if fam_size=3 then Pov_cut=8022*0.4458   ; 
else if fam_size=4 then Pov_cut=10133*0.4458   ; 
else if fam_size=5 then Pov_cut=11701*0.4458   ; 
else if fam_size=6 then Pov_cut=13056*0.4458  ;
else if fam_size=7 then Pov_cut=14315*0.4458  ; 
else if fam_size=8 then Pov_cut=16244*0.4458  ; 
else if fam_size ge 9 then Pov_cut=19249*0.4458;
END; 
		
DROP ed_type;

RUN;

PROC SORT;
   BY ref_year; 
RUN; 
PROC MEANS; 
PROC MEANS;
   BY ref_year; 
RUN;


***************************************************************************
EXPORT DATA AS STATA DATA FILE FOR COMPARISON
***************************************************************************;

proc export data=alldat.famstep7273 
	OUTFILE="~/jim/data/data_for_others/famstep7273.dta"
	REPLACE;
run;




*%gzipsq(work,famstep7273,~/jim/data);


